**********************************************************************************************
* This file merges the World Bank Aid Location Data, the GED Conflict Data and the Control Variables on the ADM1 and ADM2 level.
**********************************************************************************************

* global macros
/*
** Lennart
global data "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\Raw Data"


** Melvin
global data "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\Raw Data"
*/
/*
**Kai
global data "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\Raw Data"
*/
/*
*Kai 2 Home
global data "C:\Users\kai_g\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "C:\Users\kai_g\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "C:\Users\kai_g\Dropbox\Geocoded Aid and Conflict\Raw Data"
*/



*************************************************************************************
*************************               ADM1         ********************************
*************************************************************************************
global cwd "$data\Aid\2017_11_14_WB"
cd "$cwd"


******************************Prepare further data, move to create part**************

* Import global battle related deaths (ADM1/ADM2 are the same as variable is global
import excel using "$rawdata\Global Trends\ucdp-brd-conf-181.xlsx", clear firstrow
collapse (sum) bdbest, by(year)
renvars bdbest year / brd_global transaction_year
save brd_glob.dta, replace


**************************************************************************************************************
******************SCAD ADM1*********************************************************************************
**************************************************************************************************************

*reading and cleaning dataset
import delimited "$data\Intersected\scadafrica_adm1.txt", clear
rename iso iso_country
rename hasc_1 hasc_adm1

gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0 r id_1)
drop c r           
label var ID_adm1 "Unique identifier for ADM1 region"
***Keep only events where we have at least information about the region
drop if locnum==-99  |locnum==7 |locnum==11 | locnum==12


*drop events with start date prior to 1990 as their eventtype is always NA
drop if styr < 1990

*generating dummies for demonstrations, riots, strikes and violent events
generate demo = 0
replace demo = 1 if etype == 1 | etype == 2
generate riot = 0
replace riot = 1 if etype == 3 | etype == 4
generate strike = 0
replace strike = 1 if etype == 5 | etype == 6
generate pro_gov_violence = 0
replace pro_gov_violence = 1 if etype == 7
generate pro_gov_vio_leth = 0
replace pro_gov_vio_leth = 1 if etype == 7 & ((ndeath>0 & ndeath!=.) | ndeath==-88 | ndeath==-77)
generate pro_gov_vio_nleth = 0
replace pro_gov_vio_nleth = 1 if etype == 7 & (ndeath==0 | ndeath==-99 )
generate anti_gov_violence = 0
replace anti_gov_violence = 1 if etype == 8
generate extra_gov_violence = 0
replace extra_gov_violence = 1 if etype == 9
generate intra_gov_violence = 0
replace intra_gov_violence = 1 if etype == 10
generate total_violence = 0
replace total_violence = 1 if etype == 7 | etype == 8 | etype == 9 | etype == 10
generate total_rds  = 0
replace total_rds = 1 if strike == 1 | riot == 1 | demo == 1

gen repress_all = 0 if repress==0
replace repress_all = 1 if repress==1 | repress==2
replace repress_all = 1 if pro_gov_violence == 1 // code pro government violence (repression) as repression as want to capture government initiated repression
replace repress_all = . if etype == 10
gen repress_l = 0 if repress==0 | repress==1
replace repress_l = 1 if repress==2
replace repress_l = 1 if pro_gov_vio_leth == 1
replace repress_l = . if etype == 10
gen repress_nl = 0 if repress==0 | repress==2
replace repress_nl = 1 if repress==1
replace repress_nl = 1 if pro_gov_vio_nleth == 1
replace repress_nl = . if etype == 10


gen repress_all_no89 = 0 if repress==0
replace repress_all_no89 = 1 if repress==1 | repress==2
replace repress_all_no89 = 1 if pro_gov_violence == 1 // code pro government violence (repression) as repression as want to capture government initiated repression
replace repress_all_no89 = . if etype == 8 |  etype == 9 | etype == 10
gen repress_l_no89 = 0 if repress==0 | repress==1
replace repress_l_no89 = 1 if repress==2
replace repress_l_no89 = 1 if pro_gov_vio_leth == 1
replace repress_l_no89 = . if etype == 8 |  etype == 9 | etype == 10
gen repress_nl_no89 = 0 if repress==0 | repress==2
replace repress_nl_no89 = 1 if repress==1
replace repress_nl_no89 = 1 if pro_gov_vio_nleth == 1
replace repress_nl_no89 = . if etype == 8 |  etype == 9 | etype == 10


*generating year and event dummies
quietly{
forvalues jahr = 1990(1)2016 {
gen y`jahr' = 0
replace y`jahr' = 1 if eyr >= `jahr' & styr <= `jahr'
 
gen N_demo`jahr' = y`jahr'*demo
gen N_riot`jahr' = y`jahr'*riot
gen N_strike`jahr' = y`jahr'*strike
gen N_pro_gov_violence`jahr' = y`jahr'*pro_gov_violence
gen N_pro_gov_vio_leth`jahr' = y`jahr'*pro_gov_vio_leth
gen N_pro_gov_vio_nleth`jahr' = y`jahr'*pro_gov_vio_nleth
gen N_anti_gov_violence`jahr' = y`jahr'*anti_gov_violence
gen N_extra_gov_violence`jahr' = y`jahr'*extra_gov_violence
gen N_intra_gov_violence`jahr' = y`jahr'*intra_gov_violence
gen N_total_violence`jahr' = y`jahr'*total_violence
gen N_total_rds`jahr' = y`jahr'*total_rds
gen N_repress_all`jahr' = y`jahr'*repress_all
gen N_repress_nl`jahr' = y`jahr'*repress_nl
gen N_repress_l`jahr' = y`jahr'*repress_l
gen N_repress_all_no89`jahr' = y`jahr'*repress_all_no89
gen N_repress_nl_no89`jahr' = y`jahr'*repress_nl_no89
gen N_repress_l_no89`jahr' = y`jahr'*repress_l_no89
}
}

*reshaping dummies so as to have values in long form and then collapsing so as to have number of events per year per adm1 region
quietly {
	reshape long  N_demo N_riot N_strike N_pro_gov_violence N_pro_gov_vio_leth N_pro_gov_vio_nleth N_anti_gov_violence N_extra_gov_violence N_intra_gov_violence N_total_violence N_total_rds N_repress_all N_repress_nl N_repress_l N_repress_all_no89 N_repress_nl_no89 N_repress_l_no89, i(fid) j(year)

	*Collapsing to get number of deaths per adm1 per year
	drop if ID_adm1 == " "
	collapse (sum) N_* (first) iso_country countrynam, by (ID_adm1 year)
}
*generating event dummies per adm1 region per year
gen  D_demo = 0
gen  D_riot = 0
gen  D_strike = 0
gen  D_pro_gov_violence = 0
gen  D_pro_gov_vio_leth = 0
gen  D_pro_gov_vio_nleth = 0
gen  D_anti_gov_violence = 0
gen  D_extra_gov_violence = 0
gen  D_intra_gov_violence = 0
gen  D_total_violence = 0
gen  D_total_rds = 0
gen  D_repress_all = 0
gen  D_repress_nl = 0
gen  D_repress_l = 0
gen  D_repress_all_no89 = 0
gen  D_repress_nl_no89 = 0
gen  D_repress_l_no89 = 0

replace D_demo = 1 if  N_demo != 0
replace D_riot = 1 if  N_riot != 0
replace D_strike = 1 if  N_strike != 0
replace D_pro_gov_violence = 1 if  N_pro_gov_violence != 0
replace D_pro_gov_vio_leth = 1 if  N_pro_gov_vio_leth != 0
replace D_pro_gov_vio_nleth = 1 if  N_pro_gov_vio_nleth != 0
replace D_anti_gov_violence = 1 if  N_anti_gov_violence != 0
replace D_extra_gov_violence = 1 if  N_extra_gov_violence != 0
replace D_intra_gov_violence = 1 if  N_intra_gov_violence != 0
replace D_total_violence = 1 if  N_total_violence != 0
replace D_total_rds = 1 if N_total_rds !=0
replace D_repress_all = 1 if N_repress_all !=0
replace D_repress_nl = 1 if N_repress_nl !=0
replace D_repress_l = 1 if N_repress_l !=0
replace D_repress_all_no89 = 1 if N_repress_all_no89 !=0
replace D_repress_nl_no89 = 1 if N_repress_nl_no89 !=0
replace D_repress_l_no89 = 1 if N_repress_l_no89 !=0

*Adding labels
quietly {
label variable year "Year"
label variable iso_country "Three letter ISO code for a country"
label variable countrynam "Name of the country"
label variable ID_adm1 "Unique code for each adm1 region"
label variable N_demo "Number of demonstartions in adm1 region in given year"
label variable N_riot "Number of riots in adm1 region in given year"
label variable N_strike "Number of strikes in adm1 region in given year"
label variable N_pro_gov_violence "Number of pro Government violent events in adm1 region in given year"
label variable N_pro_gov_vio_leth "Number of lethal pro Government violent events in adm1 region in given year"
label variable N_pro_gov_vio_nleth "Number of non-lethal pro Government violent events in adm1 region in given year"
label variable N_anti_gov_violence "Number of anti Government violent events in adm1 region in given year"
label variable N_extra_gov_violence "Number of extra Government violent events in adm1 region in given year"
label variable N_intra_gov_violence "Number of intra Government violent events in adm1 region in given year"
label variable N_total_violence "Number of pro-,anti-,extra- and intra-gvmt violent events in adm1 region in given year"
label variable N_total_rds "Number of riots, strikes or demos in adm1 region in given year"
label variable N_repress_all "Number of gov repression in adm1 region in given year"
label variable N_repress_nl "Number of non-lethal gov repression in adm1 region in given year"
label variable N_repress_l "Number of lethal gov repression in adm1 region in given year"

label variable D_demo "Where there demonstartions in adm1 region in given year"
label variable D_riot "Where there riots in adm1 region in given year"
label variable D_strike "Where there strikes in adm1 region in given year"
label variable D_pro_gov_violence "Where there pro Government violent events in adm1 region in given year"
label variable D_pro_gov_vio_leth "Were there lethal pro Government violent events in adm1 region in given year"
label variable D_pro_gov_vio_nleth "Were there non-lethal pro Government violent events in adm1 region in given year"
label variable D_anti_gov_violence "Where there anti Government violent events in adm1 region in given year"
label variable D_extra_gov_violence "Where there extra Government violent events in adm1 region in given year"
label variable D_intra_gov_violence "Where there intra Government violent events in adm1 region in given year"
label variable D_total_violence "Where there pro-,anti-,extra- and intra-gvmt violent events in adm1 region in given year"
label variable D_total_rds "Where there riots, strikes or demos in adm1 region in given year"
label variable D_repress_all "Where there  gov repression in adm1 region in given year"
label variable D_repress_nl "Where there  non-lethal gov repression in adm1 region in given year"
label variable D_repress_l "Where there  lethal gov repression in adm1 region in given year"
}

save "$data\dataprocessed/SCAD_Africa_adm1.dta", replace
*Missing regions are probably those with no intersect, as there was no event.

**************************************************************************************************************
******************SCAD ADM2*********************************************************************************
**************************************************************************************************************

*reading and cleaning dataset
import delimited "$data\Intersected\scadafrica_adm2.txt", clear
rename iso iso_country
rename hasc_2 hasc_adm2

gen c = "c"
gen r = "r"
egen ID_adm2 = concat(c id_0 r id_1 r id_2)
drop c r           
label var ID_adm2 "Unique identifier for ADM1 region"


***Keep only events where we have at least information about the region
drop if locnum==-99  |locnum==7 |locnum==11 | locnum==12

*drop events with start date prior to 1990 as their eventtype is always NA
drop if styr < 1990



*generating dummies for demonstrations, riots, strikes and violent events
{
generate demo = 0
replace demo = 1 if etype == 1 | etype == 2
generate riot = 0
replace riot = 1 if etype == 3 | etype == 4
generate strike = 0
replace strike = 1 if etype == 5 | etype == 6
generate pro_gov_violence = 0
replace pro_gov_violence = 1 if etype == 7
generate pro_gov_vio_leth = 0
replace pro_gov_vio_leth = 1 if etype == 7 & ((ndeath>0 & ndeath!=.) | ndeath==-88 | ndeath==-77)
generate pro_gov_vio_nleth = 0
replace pro_gov_vio_nleth = 1 if etype == 7 & (ndeath==0 | ndeath==-99 )
generate anti_gov_violence = 0
replace anti_gov_violence = 1 if etype == 8
generate extra_gov_violence = 0
replace extra_gov_violence = 1 if etype == 9
generate intra_gov_violence = 0
replace intra_gov_violence = 1 if etype == 10
generate total_violence = 0
replace total_violence = 1 if etype == 7 | etype == 8 | etype == 9 | etype == 10
generate total_rds  = 0
replace total_rds = 1 if strike == 1 | riot == 1 | demo == 1
gen repress_all = 0 if repress==0
replace repress_all = 1 if repress==1 | repress==2
replace repress_all = . if etype == 9 | etype == 10
gen repress_l = 0 if repress==0 | repress==1
replace repress_l = 1 if repress==2
replace repress_l = . if etype == 9 | etype == 10
gen repress_nl = 0 if repress==0 | repress==2
replace repress_nl = 1 if repress==1
replace repress_nl = . if etype == 9 | etype == 10


}
*generating year and event dummies
quietly {
forvalues jahr = 1990(1)2016 {
gen y`jahr' = 0
replace y`jahr' = 1 if eyr >= `jahr' & styr <= `jahr'

gen N_demo`jahr' = y`jahr'*demo
gen N_riot`jahr' = y`jahr'*riot
gen N_strike`jahr' = y`jahr'*strike
gen N_pro_gov_violence`jahr' = y`jahr'*pro_gov_vio_leth
gen N_pro_gov_vio_leth`jahr' = y`jahr'*pro_gov_vio_nleth
gen N_pro_gov_vio_nleth`jahr' = y`jahr'*pro_gov_violence
gen N_anti_gov_violence`jahr' = y`jahr'*anti_gov_violence
gen N_extra_gov_violence`jahr' = y`jahr'*extra_gov_violence
gen N_intra_gov_violence`jahr' = y`jahr'*intra_gov_violence
gen N_total_violence`jahr' = y`jahr'*total_violence
gen N_total_rds`jahr' = y`jahr'*total_rds
gen N_repress_all`jahr' = y`jahr'*repress_all
gen N_repress_nl`jahr' = y`jahr'*repress_nl
gen N_repress_l`jahr' = y`jahr'*repress_l
}
}

*reshaping dummies so as to have values in long form and then collapsing so as to have number of events per year per adm1 region
quietly{
	reshape long  N_demo N_riot N_strike N_pro_gov_violence N_pro_gov_vio_leth N_pro_gov_vio_nleth N_anti_gov_violence N_extra_gov_violence N_intra_gov_violence N_total_violence N_total_rds N_repress_all N_repress_nl N_repress_l, i(fid) j(year)

	*Collapsing to get number of deaths per adm2 per year
	drop if hasc_adm2 == " "
	collapse (sum) N_* (first) iso_country countrynam, by (ID_adm2 year)
}

*generating event dummies per adm1 region per year
{
gen  D_demo = 0
gen  D_riot = 0
gen  D_strike = 0
gen  D_pro_gov_violence = 0
gen  D_pro_gov_vio_leth = 0
gen  D_pro_gov_vio_nleth = 0
gen  D_anti_gov_violence = 0
gen  D_extra_gov_violence = 0
gen  D_intra_gov_violence = 0
gen  D_total_violence = 0
gen  D_total_rds = 0
gen  D_repress_all = 0
gen  D_repress_nl = 0
gen  D_repress_l = 0

replace D_demo = 1 if  N_demo != 0
replace D_riot = 1 if  N_riot != 0
replace D_strike = 1 if  N_strike != 0
replace D_pro_gov_violence = 1 if  N_pro_gov_violence != 0
replace D_pro_gov_vio_leth = 1 if  N_pro_gov_vio_leth != 0
replace D_pro_gov_vio_nleth = 1 if  N_pro_gov_vio_nleth != 0
replace D_anti_gov_violence = 1 if  N_anti_gov_violence != 0
replace D_extra_gov_violence = 1 if  N_extra_gov_violence != 0
replace D_intra_gov_violence = 1 if  N_intra_gov_violence != 0
replace D_total_violence = 1 if  N_total_violence != 0
replace D_total_rds = 1 if N_total_rds !=0
replace D_repress_all = 1 if N_repress_all !=0
replace D_repress_nl = 1 if N_repress_nl !=0
replace D_repress_l = 1 if N_repress_l !=0
}
*Labeling
quietly {
label variable year "Year"
label variable iso_country "Three letter ISO code for a country"
label variable countrynam "Name of the country"
label variable ID_adm2 "Unique code for each adm2 region"
label variable N_demo "Number of demonstartions in adm2 region in given year"
label variable N_riot "Number of riots in adm2 region in given year"
label variable N_strike "Number of strikes in adm2 region in given year"
label variable N_pro_gov_violence "Number of pro Government violent events in adm2 region in given year"
label variable N_pro_gov_vio_leth "Number of lethal pro Government violent events in adm2 region in given year"
label variable N_pro_gov_vio_nleth "Number of non-lethal pro Government violent events in adm2 region in given year"
label variable N_anti_gov_violence "Number of anti Government violent events in adm2 region in given year"
label variable N_extra_gov_violence "Number of extra Government violent events in adm2 region in given year"
label variable N_intra_gov_violence "Number of intra Government violent events in adm2 region in given year"
label variable N_total_violence "Number of pro-,anti-,extra- and intra-gvmt violent events in adm2 region in given year"
label variable N_total_rds "Number of riots, strikes or demos in adm2 region in given year"
label variable N_repress_all "Number of gov repression in adm2 region in given year"
label variable N_repress_nl "Number of non-lethal gov repression in adm2 region in given year"
label variable N_repress_l "Number of lethal gov repression in adm2 region in given year"

label variable D_demo "Where their demonstartions in adm2 region in given year"
label variable D_riot "Where their riots in adm2 region in given year"
label variable D_strike "Where their strikes in adm2 region in given year"
label variable D_pro_gov_violence "Where their pro Government violent events in adm2 region in given year"
label variable D_pro_gov_vio_leth "Were there lethal pro Government violent events in adm2 region in given year"
label variable D_pro_gov_vio_nleth "Were there non-lethal pro Government violent events in adm2 region in given year"
label variable D_anti_gov_violence "Where their anti Government violent events in adm2 region in given year"
label variable D_extra_gov_violence "Where their extra Government violent events in adm2 region in given year"
label variable D_intra_gov_violence "Where their intra Government violent events in adm2 region in given year"
label variable D_total_violence "Where their pro-,anti-,extra- and intra-gvmt violent events in adm2 region in given year"
label variable D_total_rds "Where their riots, strikes or demos in adm2 region in given year"
label variable D_repress_all "Where there  gov repression in adm2 region in given year"
label variable D_repress_nl "Where there  non-lethal gov repression in adm2 region in given year"
label variable D_repress_l "Where there  lethal gov repression in adm2 region in given year"

}
save "$data\dataprocessed/SCAD_Africa_adm2.dta", replace


**************************************************************************************************************
******************ACLED ADM1********************************************************************************
**************************************************************************************************************

*reading and cleaning dataset
{
import delimited "$data\Intersected\ACLED_adm1.txt", clear
rename hasc_1 hasc_adm1
rename iso iso_country
drop if hasc_adm1==" " | hasc_adm1 == ""

gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0 r id_1)
drop c r           
label var ID_adm1 "Unique identifier for ADM1 region"
}

collapse (first) iso_country hasc_adm1 ID_adm1 country admin1 admin2 event_id_c interactio, by(year event_type notes)
*generating dummies for demonstrations, riots, strikes and violent events
{
generate demo = 0
replace demo = 1 if event_type == "Riots/Protests"
generate non_violent = 0
replace non_violent = 1 if event_type == "Non-violent transfer of territory" | event_type == "Headquarters or base established"
generate violent = 0
replace violent = 1 if event_type == "Battle-Government regains territory" | event_type == "Battle-No change of territory" | event_type == "Battle-Non-state actor overtakes territory" | event_type == "Violence against civilians" | event_type == "Remote violence"

gen pro_gov_violence = 0
replace pro_gov_violence = 1 if interactio == 11 | interactio == 12 | interactio == 13 | interactio == 14 | interactio == 15 | interactio == 16 | interactio == 17 | interactio == 18 | interactio == 10
gen anti_gov_violence = 0
replace anti_gov_violence = 1 if interactio == 22 | interactio == 23 | interactio == 24 | interactio == 25 | interactio == 26 | interactio == 27 | interactio == 28 | interactio == 20
gen extra_gov_violence = 0
replace extra_gov_violence = 1 if interactio == 33 | interactio == 34 | interactio == 35 | interactio == 36 | interactio == 37 | interactio == 38 | interactio == 30 | interactio == 44 | interactio == 45 | interactio == 46 | interactio == 47 | interactio == 48 | interactio == 40
gen riot = 0
replace riot = 1 if interactio == 55 | interactio == 56 |  interactio == 57 | interactio == 58 | interactio == 50
gen protest = 0
replace protest = 1 if interactio == 66 | interactio == 67 | interactio == 68 | interactio == 60
}
*generating year and event dummies
quietly {
forvalues jahr = 1997(1)2018 {
gen y`jahr' = 0
replace y`jahr' = 1 if year == `jahr'

gen N_demo`jahr' = y`jahr'*demo
gen N_non_violent`jahr' = y`jahr'*non_violent
gen N_violent`jahr' = y`jahr'*violent

gen N_pro_gov_violence`jahr' = y`jahr'*pro_gov_violence
gen N_anti_gov_violence`jahr' = y`jahr'*anti_gov_violence
gen N_extra_gov_violence`jahr' = y`jahr'*extra_gov_violence
gen N_protest`jahr' = y`jahr'*protest
gen N_riot`jahr' = y`jahr'*riot
}
}
*reshaping dummies so as to have values in long form and then collapsing so as to have number of events per year per adm1 region
quietly {
	keep  event_id_c country admin1 admin2 N_* hasc_adm1 ID_adm1
	reshape long  N_demo N_non_violent N_violent N_pro_gov_violence N_anti_gov_violence N_extra_gov_violence N_protest N_riot, i(event_id_c) j(year)
	collapse (first) event_id_c country admin1 admin2 (sum) N_* , by (ID_adm1 year)
}

*generating event dummies per adm1 region per year
{
generate D_demo = 0
replace D_demo = 1 if N_demo != 0
generate D_non_violent = 0
replace D_non_violent= 1 if N_non_violent != 0
generate D_violent = 0
replace D_violent= 1 if N_violent != 0
gen D_pro_gov_violence = 0
replace D_pro_gov_violence= 1 if N_pro_gov_violence != 0
gen D_anti_gov_violence = 0
replace D_anti_gov_violence = 1 if N_anti_gov_violence != 0
gen D_extra_gov_violence = 0
replace D_extra_gov_violence = 1 if N_extra_gov_violence != 0
gen D_protest = 0
replace D_protest = 1 if N_protest!= 0
gen D_riot = 0
replace D_riot = 1 if N_riot != 0
}
*Adding Labels
quietly {
label variable ID_adm1 "Unique code for adm1 region from adm shapefiles"
label variable event_id_c "Event id including country code (not three letter ISO"
label variable country "Name of the country in which adm1 region is located"
label variable admin1 "Name of admin1 region as in ACLED dataset"
label variable admin2 "Name of admin2 region as in ACLED dataset"
label variable N_demo "Number of demonstrations in given region in given year"
label variable N_non_violent "Number of non-violent events in given region in given year"
label variable N_violent "Number of violent events in given region in given year"
label variable D_demo "Dummy for demonstrations in given region in given year"
label variable D_non_violent "Dummy for non-violent events in given region in given year"
label variable D_violent "Dummy for violent events in given region in given year"
}
save "$data\dataprocessed/ACLED_adm1.dta", replace

**************************************************************************************************************
******************ACLED ADM2********************************************************************************
**************************************************************************************************************

*reading and cleaning dataset
import delimited "$data\Intersected\ACLED_adm2.txt", clear
rename hasc_2 hasc_adm2
rename iso iso_country
drop if hasc_adm2==" " | hasc_adm2 == ""

gen c = "c"
gen r = "r"
egen ID_adm2 = concat(c id_0 r id_1 r id_2)
drop c r           
label var ID_adm2 "Unique identifier for ADM1 region"


collapse (first) iso_country hasc_adm2 ID_adm2 country admin1 admin2 event_id_c interactio, by(year event_type notes)
*generating dummies for demonstrations, riots, strikes and violent events
generate demo = 0
replace demo = 1 if event_type == "Riots/Protests"
generate non_violent = 0
replace non_violent = 1 if event_type == "Non-violent transfer of territory" | event_type == "Headquarters or base established"
generate violent = 0
replace violent = 1 if event_type == "Battle-Government regains territory" | event_type == "Battle-No change of territory" | event_type == "Battle-Non-state actor overtakes territory" | event_type == "Violence against civilians" | event_type == "Remote violence"
gen pro_gov_violence = 0
replace pro_gov_violence = 1 if interactio == 11 | interactio == 12 | interactio == 13 | interactio == 14 | interactio == 15 | interactio == 16 | interactio == 17 | interactio == 18 | interactio == 10
gen anti_gov_violence = 0
replace anti_gov_violence = 1 if interactio == 22 | interactio == 23 | interactio == 24 | interactio == 25 | interactio == 26 | interactio == 27 | interactio == 28 | interactio == 20
gen extra_gov_violence = 0
replace extra_gov_violence = 1 if interactio == 33 | interactio == 34 | interactio == 35 | interactio == 36 | interactio == 37 | interactio == 38 | interactio == 30 | interactio == 44 | interactio == 45 | interactio == 46 | interactio == 47 | interactio == 48 | interactio == 40
gen riot = 0
replace riot = 1 if interactio == 55 | interactio == 56 |  interactio == 57 | interactio == 58 | interactio == 50
gen protest = 0
replace protest = 1 if interactio == 66 | interactio == 67 | interactio == 68 | interactio == 60

*generating year and event dummies
quietly {
forvalues jahr = 1997(1)2018 {
gen y`jahr' = 0
replace y`jahr' = 1 if year == `jahr'

gen N_demo`jahr' = y`jahr'*demo
gen N_non_violent`jahr' = y`jahr'*non_violent
gen N_violent`jahr' = y`jahr'*violent

gen N_pro_gov_violence`jahr' = y`jahr'*pro_gov_violence
gen N_anti_gov_violence`jahr' = y`jahr'*anti_gov_violence
gen N_extra_gov_violence`jahr' = y`jahr'*extra_gov_violence
gen N_protest`jahr' = y`jahr'*protest
gen N_riot`jahr' = y`jahr'*riot
}
}
*reshaping dummies so as to have values in long form and then collapsing so as to have number of events per year per adm2 region
quietly {
	keep  event_id_c country admin1 admin2 N_* hasc_adm2 iso_country ID_adm2
	reshape long  N_demo N_non_violent N_violent N_pro_gov_violence N_anti_gov_violence N_extra_gov_violence N_protest N_riot, i(event_id_c) j(year)
	collapse (first) event_id_c country admin1 admin2 iso_country (sum) N_* , by (ID_adm2 year)
}

*generating event dummies per adm2 region per year
generate D_demo = 0
replace D_demo = 1 if N_demo != 0
generate D_non_violent = 0
replace D_non_violent= 1 if N_non_violent != 0
generate D_violent = 0
replace D_violent= 1 if N_violent != 0
gen D_pro_gov_violence = 0
replace D_pro_gov_violence= 1 if N_pro_gov_violence != 0
gen D_anti_gov_violence = 0
replace D_anti_gov_violence = 1 if N_anti_gov_violence != 0
gen D_extra_gov_violence = 0
replace D_extra_gov_violence = 1 if N_extra_gov_violence != 0
gen D_protest = 0
replace D_protest = 1 if N_protest!= 0
gen D_riot = 0
replace D_riot = 1 if N_riot != 0

*Adding Labels
quietly {
label variable ID_adm2 "unique code for adm2 region from adm shapefiles"
label variable event_id_c "Event id including country code (not three letter ISO"
label variable country "Name of the country in which adm1 region is located"
label variable admin1 "Name of admin1 region as in ACLED dataset"
label variable admin2 "Name of admin2 region as in ACLED dataset"
label variable N_demo "Number of demonstrations in given region in given year"
label variable N_non_violent "Number of non-violent events in given region in given year"
label variable N_violent "Number of violent events in given region in given year"
label variable D_demo "Dummy for demonstrations in given region in given year"
label variable D_non_violent "Dummy for non-violent events in given region in given year"
label variable D_violent "Dummy for violent events in given region in given year"
}
save "$data\dataprocessed/ACLED_adm2.dta", replace


**********ACD Conflict actors to EPR*********************
import delimited "$data\ACD\ACD2EPR-2014-1.csv", delimiter(";") clear 
drop if gwgroupid==.
foreach var in claim recruitment support{
replace `var'=. if `var'==-1
}
label var claim "1=sure ethnic claim, 2=vague claim"
label var recruitment "1=Rebellen recruit from ethnic group, 2=both rebel and gov recruit from group"
label var support "0:Little support 1:Large support 2: mixed"
gen rebelsupport=0
replace rebelsupport=1 if claim==1

gen year = from
expand 2, gen(x)
replace year=to if x==1
drop x
*
gen key= sideb_id+ gwgroupid/1000000
duplicates drop key year,force //Drop exact same observation, no information loss
tsset key year
tsfill
sort key year
foreach var in gwid statename dyadid sideb sideb_id group gwgroupid recruitment claim support rebelsupport {
bys key: carryforward `var', replace
}
gsort key -year
foreach var in gwid statename dyadid sideb sideb_id group gwgroupid recruitment claim support rebelsupport{
bys key: carryforward `var', replace
}
***Create the other years, but do not fill up info about rebel support
tsfill,full
sort key year
foreach var in gwid statename dyadid sideb sideb_id group gwgroupid {
bys key: carryforward `var' if year, replace
}
gsort key -year
foreach var in gwid statename dyadid sideb sideb_id group gwgroupid {
bys key: carryforward `var', replace
}
foreach var in recruitment claim support rebelsupport  {
replace `var'=. if `var'==. //Make sure no info stored if there is nothing
}
collapse (rawsum )rebelsupport, by(gwgroupid year)
label var rebelsupport "GREG group linked to at least one rebel with clear ethnic claim"
replace gwgroupid=gwgroupid/10
saveold "$data/dataprocessed/ACD.dta", replace
use "$data/dataprocessed/ACD.dta",clear

*************************************************************************************************



******************************************************************************************
******************************************************************************************
*********************GREG and EPR Status *************************************************
******************************************************************************************
***import GREG dataset***
	
import excel "$data/GREG.xlsx", firstrow clear
drop if FIPS_CNC80==""
saveold "$data/dataprocessed/GREG1.dta", replace

***

use "$data/dataprocessed/GREG1.dta",clear
forvalues i=1(1)5 {
replace EPRgroup`i' = "x" if EPRgroup`i' == "" /*ansonsten funktioniert der carryforward Befehl nicht wie gewünscht*/
}
gen GREG_country_group = NAME_ENC80 + " - " + G1SHORTC80
label var GREG_country_group "Country & GREG group name"
duplicates drop GREG_country_group, force
reshape long EPRgroup, i(GREG_country_group) j(EPR)
***IMPROVE HERE
drop if EPRgroup == "x" //We want to keep at least every GREG group once, but drop the others if unnecessary

replace EPRgroup = "Turkmens" if EPRgroup== "Turkmen"

*drop if EPR >1 //We want to keep at least every GREG group once, but drop the others if unnecessary
*duplicates drop  GREG_country_group, force
gen year = 1960
expand 2, gen(x)
replace year=2013 if x==1
drop x

tostring(EPR), replace
label var EPR "EPR number from Excel"
gen GREG_EPR = GREG_country_group + " - "+ EPR
encode GREG_EPR, gen(GREG_EPR_enc) //?
tsset GREG_EPR_enc year
tsfill, full
order GREG_EPR_enc year

foreach var of varlist _all {
bys GREG_EPR_enc: carryforward `var', replace
}

*replace EPRgroup = "" if EPRgroup == "x"
g country_group = NAME_ENC80 + " - " +EPRgroup  //Used to merge with EPR later
order country_group
label var country_group "country+EPR group"
rename group  group_id  //Because this exists in EPR as well
label var group_id "GREG group ID"

*duplicates list NAME_ENC80 G1SHORTC80  year
*Some GREG group have multiple EPR matches. Probably use average later, or highest value of status
*br NAME_ENC80 G1SHORTC80 EPRgroup if MultipleEPR=="x"

saveold "$data/dataprocessed/GREG2.dta", replace
*********************************************************************************


***EPR Datensatz importieren***
import excel "$data/EPR.xlsx", firstrow clear
gen country_group = statename + " - " +group
saveold "$data/dataprocessed/EPR1.dta", replace

***EPR Datensatz aufspannen
clear
gen year =. 
saveold "$data/dataprocessed/EPR2.dta", replace 


quietly{
forvalues a = 1946/2013 {
use "$data/dataprocessed/EPR1.dta", clear
gen year = `a'
keep if inrange(year, from, to) 
saveold  "$data\dataprocessed/EPRX`a'.dta", replace
}
use "$data\dataprocessed/EPR2.dta",clear
forvalues a = 1946/2013 {
append using "$data\dataprocessed/EPRX`a'.dta"
}
}

foreach var of varlist _all {
bys country_group: carryforward `var', replace
gsort +country_group -year
bys country_group: carryforward `var', replace
}
saveold "$data/dataprocessed/EPR2.dta", replace


use "$data/dataprocessed/EPR2.dta", clear
order country_group year 
sort country_group year

drop if year<1960
encode status, generate(status_num)
drop umbrella
/*browse if country_group == country_group[_n+1] & year==year[_n+1]
browse if country_group == "JapanBurakumin"*/
duplicates list country_group year //drop if country_group=="JapanBurakumin" 
duplicates drop country_group year,force //drop if country_group=="JapanBurakumin" 
replace gwgroupid=gwgroupid
*Generate power status over history variable
*merge m:1 gwgroupid year using "$data/dataprocessed\ACD.dta", keepusing(rebelsupport)

/*browse if country_group == country_group[_n+1] & year==year[_n+1]
browse if country_group == "JapanBurakumin"*/
duplicates drop country_group year,force //drop if country_group=="JapanBurakumin" 

*Generate power status over history variable
gen Nogov=0
replace Nogov=1 if status_num==6 | status_num==1
label var Nogov "EPR Status Discriminated or powerless"
gen Nogov_irr=0
replace Nogov_irr=1 if status_num==6 | status_num==1| status_num==3 | status_num==7
label var Nogov_irr "EPR Status Discriminated, powerless, irrelevant or self-excluded"

bys country_group: egen Years_nogov=mean(Nogov)
bys country_group: egen Years_nogov_b=mean(Nogov_irr)
label var Years_nogov "Years EPR Status Discriminated or powerless"
label var Years_nogov_b "Years EPR Status Discriminated or powerless Or irrelevant"

gen Nogov2000=0 if year==2000
replace Nogov2000=1 if (status_num==6 | status_num==1) &year==2000
gen Nogov2010=0 if year==2010
replace Nogov2010=1 if (status_num==6 | status_num==1) &year==2010
gen Nogov2000_b=0 if year==2000
replace Nogov2000_b=1 if (status_num==6 | status_num==1| status_num==3| status_num==7) &year==2000
gen Nogov2010_b=0 if year==2010
replace Nogov2010_b=1 if (status_num==6 | status_num==1| status_num==3 | status_num==7) &year==2010

label var Nogov2000 "EPR Status Discriminated or powerless 2000"
label var Nogov2010 "EPR Status Discriminated, powerless, irrelevant or self-excluded 2010"
label var Nogov2000_b "EPR Status Discriminated or powerless 2000"
label var Nogov2010_b "EPR Status Discriminated, powerless, irrelevant or self-excluded 2010"

gen Leader2000=0 if year==2000
replace Leader2000=1 if (status_num==8| status_num==2 | status_num==5) &year==2000
label var Leader2000 "EPR Status Senior Partner, Dominant or Monopoly 2000"
gen Leader2010=0 if year==2010
replace Leader2010=1 if (status_num==8| status_num==2 | status_num==5) &year==2010
label var Leader2010 "EPR Status Senior Partner, Dominant or Monopoly 2000"
drop from to 
bys statename: egen start_year=min(year)
saveold "$data/dataprocessed/EPR3.dta", replace

*********************************************************************************
***EPR und GREG mergen***
use "$data/dataprocessed/GREG2.dta", clear
merge m:1 country_group year using "$data/dataprocessed/EPR3.dta", keep(1 3)  //All 700 stati (country-group-year) from using are merged, the rest of groups are assumed to not hold any power
replace status="IRRELEVANT" if status==""
replace status_num=6 if status_num==.

*Order variables by status, higher status has lower number
replace status_num=status_num+10 //Only to test later whether anything was assigned wrongly
replace status_num=1 if status=="MONOPOLY" 
replace status_num=2 if status=="DOMINANT"
replace status_num=3 if status=="SENIOR PARTNER"
replace status_num=4 if status=="JUNIOR PARTNER"
replace status_num=5 if status=="SELF-EXCLUSION"
replace status_num=6 if status=="POWERLESS" | status=="IRRELEVANT"
replace status_num=7 if status=="DISCRIMINATED"
replace status_num=8 if status=="STATE COLLAPSE"
cap label define status_numX 1 "MONOPOLY" 2 "DOMINANT"  3 "SENIOR PARTNER" 4 "JUNIOR PARTNER" 5 "SELF-EXCLUSION" 6 "POWERLESS_IRRELEVANT" ///
7 "DISCRIMINATED" 8 "STATE COLLAPSE"
label values status_num status_numX 

*Fill up missing ISO codes
replace ISOC80="KHM" if NAME_ENC80=="Cambodia"
replace ISOC80="EGY" if NAME_ENC80=="Egypt"
replace ISOC80="SLV" if NAME_ENC80=="El Salvador"

*Collapse by GREG group, if there are two EPR groups, assign the higher status to the GREG group(s)
collapse  (min) status_num (first) groupid country_group   G1SHORTC80  status  WBREGIOC80 UNREGION2C80 (mean) Nogov Nogov_irr Years_nogov Years_nogov_b Nogov2000 Nogov2010 Nogov2000_b Nogov2010_b Leader2000 Leader2010  ///
 , by( ISOC80 NAME_ENC80  GREG_country_group group_id year)
drop status
label values status_num status_numX 
decode status_num,gen(status)

order ISOC80 NAME_ENC80 GREG_country_group group_id year, before( status_num)

ren groupid EPR_id //Rename EPR group ID number separetely
keep country_group ISOC80 NAME_ENC80 year group_id EPR_id G1SHORTC80 GREG_country_group status status_num  WBREGIOC80 UNREGION2C80 Nogov Nogov_irr ///
Years_nogov Years_nogov_b Nogov2000 Nogov2010 Nogov2000_b Nogov2010_b Leader2000 Leader2010  
ren  NAME_ENC80  ADM0

ren ISOC80 ISO3
drop if ISO3==""
tostring group_id, generate(groupid_s)
gen key_x=ISO3+groupid_s

duplicates list ISO3 group_id year
label var group_id "GREG group ID"

saveold "$data/dataprocessed/GREGFinal.dta", replace

***************************************************************************************
*******************************XXX Kai: Insert newly intersected GREG adm1 files, check number of observations, directly from intersect in ArcGIS we have 3'286 unique values für adm1 (Reason 10km buffer)?
*GREG ADM1 intersect
import delimited "$data\GADM1_GREG_intersect\r_out\Intersect_adm1_greg_10km.csv", clear     
//There are 3203 unique values for id_adm1  (as opposed to 3609 in original gadm data). Are missing regions those with no ethnic groups, where GREG is empty?
ren g1id group_id
ren iso_1 ISO3
encode  ISO3,gen(code_num) 
keep g1short ISO3 group_id  code_num  id_adm1 xcoord ycoord part_area
collapse ( sum) part_area (first) g1short     xcoord ycoord, by(code_num group_id ISO3 id_adm1)
tostring group_id, generate(groupid_s)
gen key=ISO3+groupid_s+id_adm1
gen year=1960
saveold "$data/dataprocessed/GREG_adm1_a.dta", replace
*
import delimited "$data\GADM1_GREG_intersect\r_out\Intersect_adm1_greg_10km.csv", clear 
ren g1id group_id
ren iso_1 ISO3
encode  ISO3,gen(code_num) 
keep g1short ISO3 group_id  code_num  id_adm1 xcoord ycoord part_area
collapse ( sum) part_area (first) g1short     xcoord ycoord, by(code_num group_id ISO3 id_adm1)
tostring group_id, generate(groupid_s)
gen key=ISO3+groupid_s+id_adm1
gen year=2016
saveold "$data/dataprocessed/GREG_adm1_b.dta", replace
*
use "$data/dataprocessed/GREG_adm1_a.dta",clear
append using "$data/dataprocessed/GREG_adm1_b.dta"
sort ISO3 group_id year
encode  key,gen(key_num)  
tsset key_num year
tsfill, full
foreach var in key group_id ISO3 code_num g1short groupid_s part_area id_adm1{
by key_num:carryforward  `var', replace
}
drop xcoord ycoord
gen key_x=ISO3+groupid_s
*

bys id_adm1 year: egen total_area=total(part_area)
gen share_group=part_area/total_area
gen share_group2=share_group*share_group
bys id_adm1 year: egen frac_temp=total(share_group2)
gen frac=1-frac_temp
drop frac_temp
label var frac "Fractionalization, based on area, 1-sum(area^2)"
bys ISO3: egen median_frac= median(frac)
bys ISO3: egen mean_frac=mean(frac)
label var median_frac "Median level of fractionalization within a country"
label var mean_frac "Mean level of fractionalization within a country"
gen L_frac=0 if frac!=.
replace L_frac=1 if frac>=median_frac
label var L_frac "1 if Fractionalization larger than median of country"
gen H_frac=0 if frac!=.
replace H_frac=1 if frac>=mean_frac
label var H_frac "1 if Fractionalization larger than mean of country"
saveold "$data/dataprocessed/GREG_adm1_c.dta", replace  //This has 7509 cross-sectional observations

****************Merge*****************************************

use "$data/dataprocessed/GREG_adm1_c.dta", clear
*codebook groupid
* So far group id's are not the same in both data sets
merge m:1 ISO3 group_id year using "$data/dataprocessed/GREGFinal.dta", keepusing(status status_num) keep(1 3)
codebook status_num
replace status_num= 6 if status_num==.  //6 == IRRELEVANT
replace status = " POWERLESS_IRRELEVANT" if status==""
replace status = subinstr(status," ","",.) 
replace status = subinstr(status,"-","",.) 
drop if status =="" //Test

local count=1
levelsof(status),local(status) clean 
foreach var in `status'{
disp "`var'"
cap drop temp
gen temp=.
replace temp=1 if status_num==`count'
bys ISO3 id_adm1 year: egen st_`var'=total(temp)
cap drop temp
 local count=`count'+1
}

collapse  (mean) st_DISCRIMINATED st_DOMINANT  st_JUNIORPARTNER st_MONOPOLY st_POWERLESS_IRRELEVANT st_SELFEXCLUSION st_SENIORPARTNER st_STATECOLLAPSE (first)L_frac H_frac frac ,   by( ISO3 id_adm1 year)
ren id_adm1 ID_adm1
encode ID_adm1, gen(ID_adm1_num)
xtset  ID_adm1_num year
saveold "$data/dataprocessed/amd1_greg_epr.dta", replace

*use "$data/dataprocessed/amd1_greg_epr.dta", clear


************************************************************************************************************************************
************************************************************************************************************************************
************************************************************************************************************************************
/*GADM1 Datei enthält keine Duplikate für einige Regionen, die anscheinend Exklaven haben: only 
                                 NAME_1 |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
                             Al Qahirah |          2        7.69        7.69
                                 Alaska |          2        7.69       15.38
                             Kiên Giang |          2        7.69       23.08
                                  Louga |          2        7.69       30.77
         Magallanes y Antártica Chilena |          2        7.69       38.46
                              Ninh Bình |          2        7.69       46.15
                                Nunavut |          3       11.54       57.69
                         Oberösterreich |          2        7.69       65.38
                             Sverdlovsk |          3       11.54       76.92
                             Tamil Nadu |          2        7.69       84.62
                             Vorarlberg |          2        7.69       92.31
                      Western Highlands |          2        7.69      100.00
----------------------------------------+-----------------------------------
                                  Total |         26      100.00
*/

************************************************2. Merge*****************************************************************************




*********************************
*** load world list of regions***
*********************************
use gadm1.dta, clear          
drop if ADM1==""            //Small island states that have no adm1 units, may cause some regions from using not being merged. 
gen transaction_year=1995
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm1 transaction_year
	egen ID_adm1_num = group(ID_adm1)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm1_num transaction_year
	//Expand dataset to match time period from 1995-2012
	tsappend, add(19)
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm1_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace 
	bysort ID_adm1_num (years_reverse): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace
	//Melvin H.L. Wong: 5. clean up
	drop years_reverse ID_adm1_num
	sort ID_adm1 transaction_year
save gadm1_full.dta, replace

*********************************
** 1-Merge-Aid Data)**
*********************************

use gadm1_full.dta, clear
clonevar year=transaction_year //For merging
encode ID_adm1, gen(ID_adm1_num)
xtset  ID_adm1_num year

* IDA       ***XXX Kai_: Do we not need to fill up the missings in the data with zeros? Or are they all outside africa
merge 1:1 ID_adm1 transaction_year using "IDA_disbursement_ADM1_tsfill.dta",   nogen keepusing(WBAID_*) keep(1 3) // 18 mismtach from using are few Aid disbursements not allocated to regions
merge 1:1 ID_adm1 transaction_year using "IDA_disbursement_ADM1_POP_tsfill.dta",  nogen keepusing(WBAID_*) keep(1 3)
* IBRD
merge 1:1 ID_adm1 transaction_year using "IBRD_disbursement_ADM1_tsfill.dta", nogen keepusing(IBRD_*) keep(1 3)
merge 1:1 ID_adm1 transaction_year using "IBRD_disbursement_ADM1_POP_tsfill.dta", nogen keepusing(IBRD_*) keep(1 3)
* China
merge 1:1 ID_adm1 transaction_year using "China_disbursement_ADM1_tsfill.dta", nogen keepusing(CAID_*) keep(1 3)
merge 1:1 ID_adm1 transaction_year using "China_disbursement_ADM1_POP_tsfill.dta", nogen keepusing(CAID_*) keep(1 3)
* India
merge 1:1 ID_adm1 transaction_year using "India_disbursement_ADM1_tsfill.dta", nogen keepusing(IAID_*) keep(1 3)
merge 1:1 ID_adm1 transaction_year using "India_disbursement_ADM1_POP_tsfill.dta", nogen  keepusing(IAID_*) keep(1 3)

*********************************
** 2-Merge-Conflict Data)**
*********************************
* GED Conflict Data 
merge 1:1 ID_adm1 transaction_year using "UCDP_GED_ADM1_tsfill(Ethnic vs Non-Ethnic).dta", nogen keep(1 3) //mismatch from using is soley due to time periods before 1995 and after 2014


**********************Merge with SCAD data********************************************
merge 1:1 ID_adm1 year using "$data\dataprocessed/SCAD_Africa_adm1.dta", nogen keep(1 3)   //SCAD only for Africa, these are the _merge ==1 (master only)  //XXX Fill up missings = 0 (in Africa), check ///
//mismatch from using is soley due to time periods before 1995 and after 2014

*********************************
** 3-Merge-Control Variables)**
*********************************
* elevation
merge m:1 ID_adm1 using "adm1elevation_cleaned.dta", nogen keepusing(elevation_*) keep(1 3) //some mismatch from master dueto small islands with no elevation data
* water bodies
merge m:1 ID_adm1 using "adm1lakesrivers_cleaned.dta", nogen keepusing(D*) keep(1 3)
* borders
merge m:1 ID_adm1 using "adm1borders_cleaned.dta", nogen keepusing(D*) keep(1 3)
* ocean access
merge m:1 ID_adm1 using "adm1oceans_cleaned.dta", nogen keepusing(D*) keep(1 3)
/* xxx Melvin 09.03.2018: Not sure if we should have this. No data does not mean no elevation, for example. Different case for Aid
* xxx Lennart 09.03.2018: We lose 598 observations here. Those are mainly small islands like Malta, Maldives or cities like Monaco. Replacing them as zeros is indeed problematic and keeping them as missing is the better choice in my opinion.
  xxx Kai 15.05.2018: Zeros for small islands might be wrong, but are unproblematic as they are dropped later on.	
*/
foreach var in Dlake Driver Dborder Docean {
replace `var'=0 if `var'==.
}

*  GREG EPR data
merge 1:1 ID_adm1 year using "$data/dataprocessed/amd1_greg_epr.dta", nogen keep(1 3)   //Missing are data where GREG codes have no groups, so they should be treated as missing
label var frac "Fractionalization based on GREG, missing if no group on adm1 polygon (10km buffer)"
label var L_frac "1 if Fractionalization larger than median of country"
label var H_frac "1 if Fractionalization larger than mean of country"
* PRIO GRID Controlls
merge 1:1 ID_adm1 transaction_year using "ADM1PRIOCONTROLS.dta",  nogen

* xxx Lennart 15.05.2018: _merge==1 due to 2014 (is not part of analysis) & _merge==2 due to year<1995 (is not part of analysis either)
* Merge with Nightlights 
merge 1:1 ID_adm1 transaction_year using "lights_adm1.dta" , nogen keep(1 3)

* merge with small country indicator
merge m:1 ISO3 using "d_pop.dta", nogen keep(1 3) // 18 mismtach from using are few Aid disbursements not allocated to regions

* Distance to capital 
merge m:1 ID_adm1 using "ADM1CAPITALCOORD.dta", nogen keep(1 3) //missing from small islands or countries without ADM1 regions in our dataset

* Population
merge 1:1 ID_adm1 transaction_year using "pop_adm1.dta", nogen keep(1 3) // missing from master is small islands mostly (tab ADM0 if _merge==1)

* Merge with steel data for IV for Chinese Aid
merge m:1 transaction_year using "steel_iv.dta", nogen keep(1 3) // missing due to time available from 1999-2013. No problem, as IV for ChinaAid only

* Merge with WB IV data
merge m:1 transaction_year using "alt_iv.dta", nogen keep(1 3) // missing due to time from 1995-2012

* Merge with Chinese aggregate Trade and FDI
merge m:1 transaction_year using "chinese_fdi.dta", nogen keep(1 3) 
merge m:1 transaction_year using "chinese_exports.dta", nogen keep(1 3) 
merge m:1 transaction_year using "chinese_imports.dta", nogen keep(1 3) 
* Merge with Chinese global trade and FDI
merge m:1 transaction_year using "global_fdi.dta", nogen keep(1 3) 
merge m:1 transaction_year using "global_exports.dta", nogen keep(1 3) 
merge m:1 transaction_year using "global_imports.dta", nogen keep(1 3) 

* Number of ethnic groups
merge m:1 ID_adm1 using "$data\GADM1_GREG_intersect\r_out\greg_adm1_groupnumber_10km.dta", nogen  keep(1 3)
merge m:1 ID_adm1 using "$data\GADM1_GREG_intersect\r_out\greg_adm1_groupnumber_1p.dta", // nogen  keep(1 3)

* Merge with Bluhm et al. 2019 instrument factor 1
merge m:1 transaction_year using "$data\Bluhm et al 2019 - instrument\june_2019\bluhmetal2019_instrument.dta", nogen keep(1 3) 
* Merge with Bluhm et al. 2021 US steel production (does not contain data for 2014)
merge m:1 transaction_year using "$data\Bluhm et al 2019 - instrument\september_2021\bluhmetal2021_steel.dta", nogen keep(1 3) 
label var us_steel_indx "US Steel Production based on Series IPN3311A2BS by FRED St. Louis (Bluhm et al.)"
/*
*xxx Melvin 09.03.2018:

We need to discuss about this issue:
Filing out missings with 0 makes sense if we think that the data is comprehensive
For Aid Data we assume comprehensive data for the coded countries
Here we would fill in 0 for countries where Aid Data did not code anything yet

My suggestion. Do not fill out missing data with full dataset but leave missing.
Only fill out the missings if the country itself has been coded. For the aid data
this has been done in the create do file
* xxx Lennart 09.03.2018: I just checked the GED codebook. Missing means zero except for Syria ("The dataset contains 135 181 events. GED 17.1 is a global dataset that covers the entirety of the Globe (excluding Syria) between 1989-01-01 and 2016-12-31."): http://ucdp.uu.se/downloads/ged/ged172.pdf 
*********************************************************************************************
************************************** Fill up missing data *********************************
*********************************************************************************************

* 3-FillUp-Aid)


* Aid Data
foreach var in WBAID_ADM1_LOC_AX WBAID_ADM1_LOC_BX {
replace `var'=0 if `var'==. 
}

* IBRD Data
foreach var in  IBRDdisbcount_ADM1_ADJ IBRDdisbcount_ADM1_ADJ_AX IBRDdisbcount_ADM1_ADJ_BX IBRDdisbcount_ADM1_ADJ_CX IBRDdisbcount_ADM1_ADJ_EX IBRDdisbcount_ADM1_ADJ_FX IBRDdisbcount_ADM1_ADJ_JX IBRDdisbcount_ADM1_ADJ_LX IBRDdisbcount_ADM1_ADJ_TX IBRDdisbcount_ADM1_ADJ_WX IBRDdisbcount_ADM1_ADJ_YX IBRDdisbcount_ADM1 IBRDdisbcount_ADM1_AX IBRDdisbcount_ADM1_BX IBRDdisbcount_ADM1_CX IBRDdisbcount_ADM1_EX IBRDdisbcount_ADM1_FX IBRDdisbcount_ADM1_JX IBRDdisbcount_ADM1_LX IBRDdisbcount_ADM1_TX IBRDdisbcount_ADM1_WX IBRDdisbcount_ADM1_YX IBRD_ADM1_LOC_ADJ_AX IBRD_ADM1_LOC_ADJ_BX IBRD_ADM1_LOC_ADJ_CX IBRD_ADM1_LOC_ADJ_EX IBRD_ADM1_LOC_ADJ_FX IBRD_ADM1_LOC_ADJ_JX IBRD_ADM1_LOC_ADJ_LX IBRD_ADM1_LOC_ADJ_TX IBRD_ADM1_LOC_ADJ_WX IBRD_ADM1_LOC_ADJ_YX IBRD_ADM1_LOC_ADJ IBRD_ADM1_LOC_AX IBRD_ADM1_LOC_BX IBRD_ADM1_LOC_CX IBRD_ADM1_LOC_EX IBRD_ADM1_LOC_FX IBRD_ADM1_LOC_JX IBRD_ADM1_LOC_LX IBRD_ADM1_LOC_TX IBRD_ADM1_LOC_WX IBRD_ADM1_LOC_YX IBRD_ADM1_LOC IBRD_ADM1_Wpop_ADJ IBRD_ADM1_Wpop_ADJ_AX IBRD_ADM1_Wpop_ADJ_BX IBRD_ADM1_Wpop_ADJ_CX IBRD_ADM1_Wpop_ADJ_EX IBRD_ADM1_Wpop_ADJ_FX IBRD_ADM1_Wpop_ADJ_JX IBRD_ADM1_Wpop_ADJ_LX IBRD_ADM1_Wpop_ADJ_TX IBRD_ADM1_Wpop_ADJ_WX IBRD_ADM1_Wpop_ADJ_YX IBRD_ADM1_Wpop IBRD_ADM1_Wpop_AX IBRD_ADM1_Wpop_BX IBRD_ADM1_Wpop_CX IBRD_ADM1_Wpop_EX IBRD_ADM1_Wpop_FX IBRD_ADM1_Wpop_JX IBRD_ADM1_Wpop_LX IBRD_ADM1_Wpop_TX IBRD_ADM1_Wpop_WX IBRD_ADM1_Wpop_YX{
replace `var'=0 if `var'==. 
}


* ChinaAid Data
//note on 07.09.2017: Do not fill ChinaAid with 0 if missing, as only available between 2000 and 2012.   CODA_ADM1_LOC_ADJ CODA_ADM1_Wpop_ADJ CODA_ADM1_LOC CODA_ADM1_Wpop COOF_ADM1_LOC_ADJ COOF_ADM1_Wpop_ADJ COOF_ADM1_LOC COOF_ADM1_Wpop
foreach var in CODA_ADM1_LOC_ADJ CODA_ADM1_Wpop_ADJ CODA_ADM1_LOC CODA_ADM1_Wpop COOF_ADM1_LOC_ADJ COOF_ADM1_Wpop_ADJ COOF_ADM1_LOC COOF_ADM1_Wpop {
replace `var'=0 if `var'==. & transaction_year>=2000 & transaction_year<=2012
}

* Indian Aid Data
foreach var in  IODA_ADM1_LOC_ADJ IOOF_ADM1_LOC_ADJ IODA_ADM1_Wpop_ADJ IOOF_ADM1_Wpop_ADJ IODA_ADM1_Wpop IOOF_ADM1_Wpop IODA_ADM1_LOC IOOF_ADM1_LOC {
replace `var'=0 if `var'==. & transaction_year>=2006 & transaction_year<=2014
}
*/

* 3-FillUp-Conflict)
* xxx Lennart 09.03.2018: I just checked the GED codebook. Missing means zero except for Syria ("The dataset contains 135 181 events. GED 17.1 is a global dataset that covers the entirety of the Globe (excluding Syria) between 1989-01-01 and 2016-12-31.") 
* GED Conflict Data
/*foreach var in   best_est_adm1_adj_ethnic best_est_adm1_adj_nonethnic best_est_adm1_adj best_est_adm1_ethnic best_est_adm1_nonethnic best_est_adm1 best_est_t1_adm1_adj_ethnic best_est_t2_adm1_adj_ethnic best_est_t3g_adm1_adj_ethnic best_est_t3ng_adm1_adj_ethnic best_est_t1_adm1_adj_nonethnic best_est_t2_adm1_adj_nonethnic best_est_t3g_adm1_adj_nonethnic  /// 
best_est_t3ng_adm1_adj_nonethnic best_est_t1_adm1_adj best_est_t2_adm1_adj best_est_t3g_adm1_adj best_est_t3ng_adm1_adj  best_est_t1_adm1_ethnic best_est_t2_adm1_ethnic ///
best_est_t3g_adm1_ethnic best_est_t3ng_adm1_ethnic best_est_t1_adm1_nonethnic best_est_t2_adm1_nonethnic best_est_t3g_adm1_nonethnic best_est_t3ng_adm1_nonethnic best_est_t1_adm1 best_est_t2_adm1 /// 
  best_est_t3g_adm1  best_est_t3ng_adm1 { */
  
foreach var in brd_adm1 brd_t1_adm1 brd_t2_adm1 brd_t3g_adm1 brd_t3ng_adm1 brd_t1_adm1_eth brd_t2_adm1_eth brd_t3g_adm1_eth ///
brd_t3ng_adm1_eth inci_t1_adm1 inci_t2_adm1 inci_t3g_adm1 inci_t3ng_adm1 inci_t1_adm1_eth inci_t2_adm1_eth inci_t3g_adm1_eth inci_t3ng_adm1_eth {
//high_est_adm1_adj_ethnic high_est_adm1_adj_nonethnic high_est_adm1_adj low_est_adm1_adj_ethnic low_est_adm1_adj_nonethnic low_est_adm1_adj high_est_adm1_ethnic low_est_adm1_ethnic high_est_adm1_nonethnic low_est_adm1_nonethnic low_est_adm1 high_est_adm1  low_inten_adm1_adj low_inten_adm1_adj_ethnic low_inten_adm1_adj_nonethnic high_inten_adm1_adj high_inten_adm1_adj_ethnic high_inten_adm1_adj_nonethnic high_inten_adm1 low_inten_adm1 high_inten_adm1_ethnic low_inten_adm1_ethnic high_inten_adm1_nonethnic low_inten_adm1_nonethnic
replace `var'=0 if `var'==. & ADM0!="Syria"
replace `var'=. if ADM0=="Syria"
}

* Fill Up SCAD Data
* XXX Lennart 25.07.2018: Fill up missings in SCAD, but not for small countries as SCAD only codes countries with pop>1 Mio in 1990. Also do not fill up for Mauritius as it did not have 1 Mio inhabitants in 1990
foreach var in N_demo N_riot N_strike N_pro_gov_violence N_anti_gov_violence N_extra_gov_violence N_intra_gov_violence ///
N_total_violence D_demo D_riot D_strike D_pro_gov_violence D_anti_gov_violence D_extra_gov_violence D_intra_gov_violence D_total_violence N_pro_gov_vio_leth N_pro_gov_vio_nleth D_pro_gov_vio_leth D_pro_gov_vio_nleth N_total_rds D_total_rds N_repress_all N_repress_nl N_repress_l D_repress_all D_repress_nl D_repress_l  N_repress_all_no89 N_repress_nl_no89 N_repress_l_no89 D_repress_all_no89 D_repress_nl_no89 D_repress_l_no89 {
replace `var'=0 if `var'==. & d_smallcountry==0
}




/*
* 3-FillUp-Controls
* xxx Lennart 09.03.2018: Maybe we do not need to fill up the PRIO controls anymore as they are only missing in 2013 and 2014, for which we apparently have also no WB Aid. Hence, we could only check if we need to replace observations for lights.
* Controls (HERE WE ARE NOT COMPLETELY SURE IF MISSINGS RELATE TO ZEROS: WE SHOULD CONSIDER ANOTHER SOLUTION)
foreach var in Population_ADM1_ADJ MEAN landarea ttime_mean excluded prec_gpcc temp droughtstart_spi droughtend_spi petroleum_s diamsec_s diamprim_s goldplacer_s goldsurface_s goldvein_s gem_s petroleum_y diamsec_y diamprim_y goldplacer_y goldsurface_y goldvein_y gem_y drug_y dist_capital isum_pop {
replace `var'=0 if `var'==.
}

*/

*xxx Melvin 09.03.2018: Lennart,  can we put this section (DAC List) into the create do file?
*xxx Lennart 09.03.2018: I think it would be cleaner to not drop data in the merge file. However, I would think that it is still safer to do this step in the end of all merging. ///
* Otherwise, we would have to do it in all create steps and might still miss some regions due to encoding of country names. Alternatively, we would have to go via the numeric ID_ADM0.
********************************
* Clean Data from Non-Recipients
********************************
********************************************************************************

***Kai Why do we keep only DAC countries here?
* Replace Regions, which became independent or got separated due to other reasons after 1995
replace DAC_1995=1 if ADM0=="South Sudan"
replace DAC_1995=1 if ADM0=="Kosovo"
replace DAC_1995=1 if ADM0=="Kuwait"
replace DAC_1995=1 if ADM0=="Macedonia"
replace DAC_1995=1 if ADM0=="Serbia"
replace DAC_1995=1 if ADM0=="St Vincent & Grenadines"
replace DAC_1995=1 if ADM0=="St Lucia"

* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
*drop DAC


* Merge with Polity IV data for Africa
gen scode=ISO3
replace scode="ANG" if ADM0=="Angola"
replace scode="BUI" if ADM0=="Burundi"
replace scode="BFO" if ADM0=="Burkina Faso"
replace scode="BOT" if ADM0=="Botswana"
replace scode="CEN" if ADM0=="Central African Republic"
replace scode="IVO" if ADM0=="Côte d'Ivoire"
replace scode="CAO" if ADM0=="Cameroon"
replace scode="ZAI" if ADM0=="Congo, Dem. Rep."
replace scode="CON" if ADM0=="Congo, Rep."
replace scode="ALG" if ADM0=="Algeria"
replace scode="ETI" if ADM0=="Ethiopia"
replace scode="GUI" if ADM0=="Guinea"
replace scode="GAM" if ADM0=="Gambia"
replace scode="LIB" if ADM0=="Libya"
replace scode="LES" if ADM0=="Lesotho"
replace scode="MOR" if ADM0=="Morocco"
replace scode="MAG" if ADM0=="Madagascar"
replace scode="MZM" if ADM0=="Mozambique"
replace scode="MAA" if ADM0=="Mauritania"
replace scode="MAS" if ADM0=="Mauritius"
replace scode="MAW" if ADM0=="Malawi"
replace scode="NIR" if ADM0=="Niger"
replace scode="NIG" if ADM0=="Nigeria"
replace scode="SIE" if ADM0=="Sierra Leone"
replace scode="CHA" if ADM0=="Chad"
replace scode="TOG" if ADM0=="Togo"
replace scode="TAZ" if ADM0=="Tanzania"
replace scode="ZAM" if ADM0=="Zambia"
replace scode="ZIM" if ADM0=="Zimbabwe"
replace scode="SAF" if ADM0=="South Africa"
drop _merge
sort scode year
merge m:1 scode year  using "$rawdata\Polity IV\p4v2017.dta", nogen keep(3) keepusing(polity2)


* Define Democracy coding
. gen democracy=0

. replace democracy=1 if polity2>6 & polity2<=10
label var democracy "African states with POLITY2>6"


* Merge with Bjornskov Rode Data
merge m:1 ISO3 transaction_year using "$data\Bjornskov & Rode\Bjørnskov-Rode-integrated-dataset-v2.2.dta", nogen keep(3) keepusing(democracy_br)
label var democracy_br "Democracy indicator by Bjornskov & Rode"



* Prepare IDs for XTSET
egen state=group(ADM0)
capture drop ID_adm1_num
egen ID_adm1_num=group(ID_adm1)
xtset ID_adm1_num transaction_year

* Merge with global conflict
merge m:1 transaction_year using brd_glob.dta, nogen keep(1 3)

label var brd_glob "Sum of all global BRD from UCDP's best estimate"
* Merge with further controls
merge m:1 transaction_year ADM0 using "$data\Xpolity\Xpolity.dta", keep(1 3) nogen
merge m:1 transaction_year ADM0 using "$data\FreedomHouse\press_freedom.dta", keep(1 3) nogen
 merge m:1 transaction_year ADM0 using  "$data\HRV_Transparency\transparency.dta", keep(1 3) nogen
 merge m:1 transaction_year ID_adm1 using "$data\BirthRegions\br.dta", keep(1 3) nogen

 * Fill-up birthregions
 replace birthregion=0 if birthregion==.
* Save the merged file
save "$data\Merge\IDA_Aid_GED_ADM1.dta", replace


* Add global trends

* Import S&P500 (Source: Yahoo Finance)
import excel using "$rawdata\Global Trends\S&P500.xlsx", clear firstrow
gen transaction_year=year(Date )
* drop 1994 observation
drop if transaction_year==1994
destring Open, replace
collapse (mean) Open, by(transaction_year)
rename Open snp500
label var snp500 "S&P500 Index (opening)"
tempfile stock_index
save `stock_index', replace

* Import Commodity Price Index (Source: FRED St Louis)
import excel using "$rawdata\Global Trends\FRED_Commodity_Index.xlsx", clear firstrow
gen transaction_year=year( observation_date )
collapse (mean) PALLFNFINDEXQ, by(transaction_year)
rename PALLFNFINDEXQ commodity_index
label var commodity_index "FRED St. Louis Commodity Price Index"
tempfile com_index
save `com_index', replace

* Import World GDP Data (Source: WDI)
import excel using "$rawdata\Global Trends\GDP_PPP_Curr_Dollar.xlsx", clear firstrow
keep if SeriesName=="GDP, PPP (current international $)"
keep if CountryName=="World"
reshape long y, i(CountryName) j(transaction_year)
rename y GDP_global
label var GDP_global "World GDP"
destring GDP_global, replace
keep GDP_global transaction_year
tempfile gdp_glob
save `gdp_glob', replace

* Import FDI (Source:WDI)
import excel using "$rawdata\Global Trends\WDI_FDI.xlsx", clear firstrow
keep if CountryName=="World"
drop CountryCode IndicatorName IndicatorCode AG
reshape long y, i(CountryName) j(transaction_year)
rename y FDI_global
label var FDI_global "Annual FDI outflows"
tempfile fdi_glob
save  `fdi_glob', replace

* Import Trade (Source: UN Comtrade)
import delimited using "$rawdata\Global Trends\comtrade.csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t1
save `t1', replace
import delimited using "$rawdata\Global Trends\comtrade(1).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t2
save `t2', replace
import delimited using "$rawdata\Global Trends\comtrade(2).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t3
save `t3', replace
import delimited using "$rawdata\Global Trends\comtrade(3).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t4
save `t4', replace
import delimited using "$rawdata\Global Trends\comtrade(4).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t5
save `t5', replace
append using `t1'
append using `t2'
append using `t3'
append using `t4'
tempfile trade_glob
save `trade_glob', replace

* Import global battle related deaths
import excel using "$rawdata\Global Trends\ucdp-brd-conf-181.xlsx", clear firstrow
collapse (sum) bdbest, by(year)
renvars bdbest year / brd_global transaction_year
tempfile brd_glob
save `brd_glob', replace


clear all
use "$data\Merge\IDA_Aid_GED_ADM1.dta", clear


merge m:1 transaction_year using `trade_glob', nogen keep(1 3)
merge m:1 transaction_year using `fdi_glob', nogen keep(1 3)
merge m:1 transaction_year using `gdp_glob', nogen keep(1 3)
merge m:1 transaction_year using `brd_glob', nogen keep(1 3)
merge m:1 transaction_year using `com_index', nogen keep(1 3)
merge m:1 transaction_year using `stock_index', nogen keep(1 3)



* Save the merged file
save "$data\Merge\IDA_Aid_GED_ADM1.dta", replace













*************************************************************************************
*************************               ADM2         ********************************
*************************************************************************************
global cwd "$data\Aid\2017_11_14_WB"
cd "$cwd"
*********************************
*** load world list of regions***
*********************************
use gadm2.dta, clear
drop if ADM1==""            //Small island states that have no adm1 units, may cause some regions from using not being merged. 
gen transaction_year=1995
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm2 transaction_year
	egen ID_adm2_num = group(ID_adm2)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm2_num transaction_year
	//Expand dataset to match time period from 1995-2012
	tsappend, add(19)
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm2_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* d_miss_ADM2 DAC_1995, replace 
	bysort ID_adm2_num (years_reverse): carryforward ID_adm* ADM* ISO3 ID_* d_miss_ADM2 DAC_1995, replace
	//Melvin H.L. Wong: 5. clean up
	drop years_reverse ID_adm2_num
	sort ID_adm2 transaction_year
save gadm2_full.dta, replace
*********************************
** 1-Merge-Aid Data)**
*********************************
use gadm2_full.dta, clear
* IDA
merge 1:1 ID_adm2 transaction_year using "IDA_disbursement_ADM2_tsfill.dta", nogen keepusing(WBAID_*) // 18 mismtach from using are few Aid disbursements not allocated to regions
merge 1:1 ID_adm2 transaction_year using "IDA_disbursement_ADM2_POP_tsfill.dta", nogen keepusing(WBAID_*)
* IBRD
merge 1:1 ID_adm2 transaction_year using "IBRD_disbursement_ADM2_tsfill.dta", nogen keepusing(IBRD_*)
merge 1:1 ID_adm2 transaction_year using "IBRD_disbursement_ADM2_POP_tsfill.dta", nogen keepusing(IBRD_*)
* China
merge 1:1 ID_adm2 transaction_year using "China_disbursement_ADM2_tsfill.dta", nogen keepusing(CAID_*)
merge 1:1 ID_adm2 transaction_year using "China_disbursement_ADM2_POP_tsfill.dta", nogen keepusing(CAID_*)
* India
merge 1:1 ID_adm2 transaction_year using "India_disbursement_ADM2_tsfill.dta", nogen keepusing(IAID_*)
merge 1:1 ID_adm2 transaction_year using "India_disbursement_ADM2_POP_tsfill.dta", nogen  keepusing(IAID_*)

*********************************
** 2-Merge-Conflict Data)**
*********************************
* GED Conflict Data 
merge 1:1 ID_adm2 transaction_year using "UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", nogen keep(1 3) //mismtach from using is soley due to time periods before 1995 and after 2014

**********************Merge with SCAD data********************************************
rename transaction_year year
* Lennart 25.07.2017: Some countries seem to be missing in SCAD (Cape Verde Congo DRC Djibouti  Guinea Bissau Liberia Malawi Mauritius ?)
merge 1:1 ID_adm2 year using "$data\dataprocessed/SCAD_Africa_adm2.dta", nogen keep(1 3)   //SCAD only for Africa, these are the _merge ==1 (master only)  //XXX Fill up missings = 0 (in Africa), check ///
rename year transaction_year
//mismatch from using is soley due to time periods before 1995 and after 2014



*********************************
** 3-Merge-Control Variables)**
*********************************
* Lennart: 24.07.2018 for all following four merges 200 empty and 20 observations from Monacco in master (_merge 1) are not merged --> potentially need to check the 200 empty observations
* elevation
merge m:1 ID_adm2 using "adm2elevation_cleaned.dta", nogen keepusing(elevation_*) //some mismatch from master due to small islands with no elevation data
* water bodies
merge m:1 ID_adm2 using "adm2lakesrivers_cleaned.dta", nogen keepusing(D*)
* borders
merge m:1 ID_adm2 using "adm2borders_cleaned.dta", nogen keepusing(D*)
* ocean access
merge m:1 ID_adm2 using "adm2oceans_cleaned.dta", nogen keepusing(D*)
/* xxx Melvin 09.03.2018: Not sure if we should have this. No data does not mean no elevation, for example. Different case for Aid
* xxx Lennart 09.03.2018: We lose 578 observations here. Those are mainly small islands like Malta, Maldives or cities like Monaco. Replacing them as zeros is indeed problematic and keeping them as missing is the better choice in my opinion.
foreach var in Dlake Driver Dborder Docean {
replace `var'=0 if `var'==.
}
*/


* Lennart: 24.07.2018: GREG Data on ADM2 level are still missing. Would we like to add them? 

* PRIO GRID Controlls
* Lennart 24.07.2018: Some observations for Monacco & Parcel Island & some empty (no ADM* identifiers) regions are not merged. 
merge 1:1 ID_adm2 transaction_year using "ADM2PRIOCONTROLS.dta", nogen

* Merge with Nightlights
merge 1:1 ID_adm2 transaction_year using "lights_adm2.dta", nogen keep(1 3) // Lennart 25.07.2018: Failed merges before 1995 and after 2012 (which is not in our main sample period)

* merge with small country indicator
merge m:1 ISO3 using "d_pop.dta", nogen keep(1 3) // 18 mismtach from using are few Aid disbursements not allocated to regions

* Distance to capital
merge m:1 ID_adm2 using "ADM2CAPITALCOORD.dta", nogen //missing from small islands or countries without ADM1 regions in our dataset

* Population
merge 1:1 ID_adm2 transaction_year using "pop_adm2.dta", nogen // failed merges refer to Monacco or parcel islands. other mismatches are out of the sampling period (either <1995 or >2012)
/*
mismatch from master is mostly due to years 2013 and 2014 (time dimension only relevant to India Aid)
mismatch from using does not exist
*/
* Merge with steel data for IV for Chinese Aid
merge m:1 transaction_year using "steel_iv.dta", nogen // missing due to time available from 1999-2013. No problem, as IV for ChinaAid only

* Merge with WB IV data
merge m:1 transaction_year using "alt_iv.dta", nogen // missing due to time from 1995-2012

* Merge with Chinese aggregate Trade and FDI
merge m:1 transaction_year using "chinese_fdi.dta", nogen keep(1 3) 
merge m:1 transaction_year using "chinese_exports.dta", nogen keep(1 3) 

* Merge with Bluhm et al. 2019 instrument factor 1
merge m:1 transaction_year using "$data\Bluhm et al 2019 - instrument\june_2019\bluhmetal2019_instrument.dta", nogen keep(1 3) 
* Merge with Bluhm et al. 2021 US steel production (does not contain data for 2014)
merge m:1 transaction_year using "$data\Bluhm et al 2019 - instrument\september_2021\bluhmetal2021_steel.dta", nogen keep(1 3) 
label var us_steel_indx "US Steel Production based on Series IPN3311A2BS by FRED St. Louis (Bluhm et al.)"

* Lennart 24.07.2018: Number of ethnic groups per region are not merged anymore. But would we need this, if we go for GREG?

***************************************************
************** Fill up Missings *******************
***************************************************

 *3-FillUp-Conflict)
* xxx Lennart 09.03.2018: I just checked the GED codebook. Missing means zero except for Syria ("The dataset contains 135 181 events. GED 17.1 is a global dataset that covers the entirety of the Globe (excluding Syria) between 1989-01-01 and 2016-12-31.") 
* GED Conflict Data
foreach var in  inci_t1_adm2 inci_t2_adm2 inci_t3g_adm2 inci_t3ng_adm2 inci_t1_adm2_eth inci_t2_adm2_eth inci_t3g_adm2_eth inci_t3ng_adm2_eth  brd_adm2 brd_t1_adm2 brd_t2_adm2 brd_t3g_adm2 brd_t3ng_adm2 brd_t1_adm2_eth brd_t2_adm2_eth brd_t3g_adm2_eth brd_t3ng_adm2_eth {
replace `var'=0 if `var'==. & ADM0!="Syria"
replace `var'=. if ADM0=="Syria"
}


* Fill Up SCAD Data
* XXX Lennart 25.07.2018: Fill up missings in SCAD, but not for small countries as SCAD only codes countries with pop>1 Mio in 1990. Also do not fill up for Mauritius as it did not have 1 Mio inhabitants in 1990
*foreach var in N_demo N_riot N_strike N_pro_gov_violence N_anti_gov_violence N_extra_gov_violence N_intra_gov_violence ///
*N_total_violence D_demo D_riot D_strike D_pro_gov_violence D_anti_gov_violence D_extra_gov_violence D_intra_gov_violence D_total_violence N_pro_gov_vio_leth N_pro_gov_vio_nleth D_pro_gov_vio_leth D_pro_gov_vio_nleth N_total_rds D_total_rds{
*replace `var'=0 if `var'==. & d_smallcountry==0 & ADM0!="Mauritius" & ADM0!="Congo, Dem. Rep." & ADM0!="Ethiopia" & ADM0!="Guinea-Bissau" & ADM0!="Kenya" & ADM0!="Lesotho" & ADM0!="Liberia" & ADM0!="Libya" & ADM0!="Malawi"  ///
* & ADM0!="Nigeria" & ADM0!="South Sudan" & ADM0!="Sudan"
*}


foreach var in N_demo N_riot N_strike N_pro_gov_violence N_anti_gov_violence N_extra_gov_violence N_intra_gov_violence ///
N_total_violence D_demo D_riot D_strike D_pro_gov_violence D_anti_gov_violence D_extra_gov_violence D_intra_gov_violence D_total_violence {
replace `var'=0 if `var'==. 
}




*xxx Melvin 09.03.2018: Lennart,  can we put this section (DAC List) into the create do file?
*xxx Lennart 09.03.2018: I think it would be cleaner to not drop data in the merge file. However, I would think that it is still safer to do this step in the end of all merging. ///
* Otherwise, we would have to do it in all create steps and might still miss some regions due to encoding of country names. Alternatively, we would have to go via the numeric ID_ADM0.
********************************
* Clean Data from Non-Recipients
********************************
********************************************************************************
* Replace Regions, which became independent or got separated due to other reasons after 1995
replace DAC_1995=1 if ADM0=="South Sudan"
replace DAC_1995=1 if ADM0=="Kosovo"
replace DAC_1995=1 if ADM0=="Kuwait"
replace DAC_1995=1 if ADM0=="Macedonia"
replace DAC_1995=1 if ADM0=="Serbia"
replace DAC_1995=1 if ADM0=="St Vincent & Grenadines"
replace DAC_1995=1 if ADM0=="St Lucia"


* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
*drop DAC



* Merge with Polity IV data for Africa
gen scode=ISO3
replace scode="ANG" if ADM0=="Angola"
replace scode="BUI" if ADM0=="Burundi"
replace scode="BFO" if ADM0=="Burkina Faso"
replace scode="BOT" if ADM0=="Botswana"
replace scode="CEN" if ADM0=="Central African Republic"
replace scode="IVO" if ADM0=="Côte d'Ivoire"
replace scode="CAO" if ADM0=="Cameroon"
replace scode="ZAI" if ADM0=="Congo, Dem. Rep."
replace scode="CON" if ADM0=="Congo, Rep."
replace scode="ALG" if ADM0=="Algeria"
replace scode="ETI" if ADM0=="Ethiopia"
replace scode="GUI" if ADM0=="Guinea"
replace scode="GAM" if ADM0=="Gambia"
replace scode="LIB" if ADM0=="Libya"
replace scode="LES" if ADM0=="Lesotho"
replace scode="MOR" if ADM0=="Morocco"
replace scode="MAG" if ADM0=="Madagascar"
replace scode="MZM" if ADM0=="Mozambique"
replace scode="MAA" if ADM0=="Mauritania"
replace scode="MAS" if ADM0=="Mauritius"
replace scode="MAW" if ADM0=="Malawi"
replace scode="NIR" if ADM0=="Niger"
replace scode="NIG" if ADM0=="Nigeria"
replace scode="SIE" if ADM0=="Sierra Leone"
replace scode="CHA" if ADM0=="Chad"
replace scode="TOG" if ADM0=="Togo"
replace scode="TAZ" if ADM0=="Tanzania"
replace scode="ZAM" if ADM0=="Zambia"
replace scode="ZIM" if ADM0=="Zimbabwe"
replace scode="SAF" if ADM0=="South Africa"
rename transaction_year year
sort scode year
merge m:1 scode year  using "$rawdata\Polity IV\p4v2017.dta", nogen keep(3) keepusing(polity2)


* Define democracy coding
. gen democracy=0

. replace democracy=1 if polity2>6 & polity2<=10
label var democracy "African states with POLITY2>6"

* Merge with Bjornskov Rode Data
merge m:1 ISO3 transaction_year using "$data\Bjornskov & Rode\Bjørnskov-Rode-integrated-dataset-v2.2.dta",  nogen keep(3) keepusing(democracy_br)
label var democracy_br "Democracy indicator by Bjornskov & Rode"

* Prepare IDs for XTSET
cap drop state
egen state=group(ADM0)
capture drop ID_adm2_num
egen ID_adm2_num=group(ID_adm2)
xtset ID_adm2_num transaction_year

rename year transaction_year 

* Merge with global conflict
merge m:1 transaction_year using `brd_glob', nogen keep(1 3)
label var brd_glob "Sum of all global BRD from UCDP's best estimate"

* Save the merged file
save "$data\Merge\IDA_Aid_GED_ADM2.dta", replace



